HOMEWORK 7¶

Необходимые импорты¶

In [2]:
import numpy as np
import copy

# Отображение структур
import IPython.display
import ipywidgets
from IPython.display import display,display_svg,SVG,Image

# Open Drug Discovery Toolkit
import oddt
import oddt.docking
import oddt.interactions

# Органика
from rdkit.Chem import Draw
from rdkit.Chem.Draw import IPythonConsole

import pmx # Модуль для манипулирования pdb 

import tabulate
from tabulate import tabulate

TRAIN¶

Подготовка белка¶

In [3]:
pdb=pmx.Model('new.B99990001.pdb')

for r in pdb.residues[145:]:
    print r # посмотрим остатки
<Molecule: id = 146 name = CYS chain_id =    natoms = 6>
<Molecule: id = 147 name = GLY chain_id =    natoms = 4>
<Molecule: id = 148 name = VAL chain_id =    natoms = 8>
<Molecule: id = 149 name = NAG chain_id =    natoms = 14>
<Molecule: id = 150 name = NAG chain_id =    natoms = 14>
<Molecule: id = 151 name = NDG chain_id =    natoms = 15>

Последние три остатка - лиганд. Теперь их надо разъединить.

In [4]:
# создание объекта белок
newpdb = pdb.copy()
for r in newpdb.residues[-3:]:
    newpdb.remove_residue(r)
newpdb.writePDB('newPDB.pdb')

# создание объекта лиганд
lig = pdb.copy()
del lig.residues[:-3]    

Найдем геометрический центр лиганда.

In [5]:
s = np.zeros(3)
for a in lig.atoms:
    s = s + a.x 
ligand_center = s/len(lig.atoms)
ligand_center
Out[5]:
array([ 41.98401917,  29.62299   ,  22.24433583])

Подготовка белка для докинга¶

In [6]:
prot = oddt.toolkit.readfile('pdb','newPDB.pdb').next()

prot.OBMol.AddPolarHydrogens()
prot.OBMol.AutomaticPartialCharge()
Out[6]:
True

Лиганды для докинга¶

In [7]:
def plot_smiles(smiles):
    images =[]

    for s in smiles:
        m = oddt.toolkit.readstring('smi', s)
        if not m.OBMol.Has3D(): 
            m.make3D(forcefield='mmff94', steps=150)
            m.removeh()
            m.OBMol.AddPolarHydrogens()

        mols.append(m)
        ###with print m.OBMol.Has3D() was found that:
        ### deep copy needed to keep 3D , write svg make mols flat
        images.append((SVG(copy.deepcopy(m).write('svg'))))

    display_svg(*images)
In [56]:
smiles = ['c1cccc(O)c1', 'c1c(O)ccc(O)c1','c1(O)cc(c2ccccc2)cc(O)c1']
mols = []
plot_smiles(smiles)
***** - Open Babel Depiction O H *****
***** - Open Babel Depiction O O H H *****
***** - Open Babel Depiction O O H H *****

Докинг¶

In [57]:
affinity = []
rmsd = []
In [58]:
def docking(prot, ligand_center, mols, name):
    # create docking object
    dock_obj = oddt.docking.AutodockVina.autodock_vina(
        protein=prot, size=(20,20,20), center=ligand_center, num_modes = 9,
        executable='/usr/bin/vina',autocleanup=True)

    print ('!!!')
    print " ".join(dock_obj.params)  # параметры почти все использовались те, что по дефолту
    print ('!!!')

    # do it
    res = dock_obj.dock(mols, prot)

    # Анализ докинга
    for i,r in enumerate(res):
        hbs = oddt.interactions.hbonds(prot,r)
        stack= oddt.interactions.pi_stacking(prot,r)
        phob = oddt.interactions.hydrophobic_contacts(prot,r)

    # Визуализация 
    for i,r in enumerate(res):
        r.write(filename=name % i, format='pdb', overwrite=True)
    aff = []
    rm = []
    # Результаты докинга
    for i,r in enumerate(res):
        aff.append(r.data['vina_affinity'])
        rm.append(r.data['vina_rmsd_ub'])
    affinity.append(aff)   
    rmsd.append(rm)
        #print "%10s%8s%8s" %(r.formula, r.data['vina_affinity'],  r.data['vina_rmsd_ub'])
In [59]:
docking(prot, ligand_center, mols, 'r%s.pdb')
!!!
--center_x 41.9840191667 --center_y 29.62299 --center_z 22.2443358333 --size_x 20 --size_y 20 --size_z 20 --cpu 1 --exhaustiveness 8 --num_modes 9 --energy_range 3
!!!

AFFINITY

In [63]:
print tabulate(zip(['best','||','||','||','||','||','||','\/','worst'], affinity[0][0:9],affinity[0][9:18], affinity[0][18:27]), headers=['C6H6O', 'C6H6O2', 'C12H10O2'])
         C6H6O    C6H6O2    C12H10O2
-----  -------  --------  ----------
best      -4.2      -4.3        -6.1
||        -4.1      -4.3        -6.1
||        -3.9      -4.2        -6
||        -3.8      -4.1        -6
||        -3.8      -3.9        -6
||        -3.7      -3.6        -5.9
||        -3.6      -3.5        -5.8
\/        -3.5      -3          -5.2
worst     -3.4      -3          -4.5

RMSD

In [64]:
print tabulate(zip(['best','||','||','||','||','||','||','\/','worst'], rmsd[0][0:9],rmsd[0][9:18], rmsd[0][18:27]), headers=['C6H6O', 'C6H6O2', 'C12H10O2'])
         C6H6O    C6H6O2    C12H10O2
-----  -------  --------  ----------
best     0         0           0
||       1.829     3.656       2.19
||       2.705     3.231       5.333
||       2.188     3.085       5.187
||       2.059     1.994       5.399
||       3.134     3.781       5.798
||       3.928     3.504       1.497
\/       6.292    15.575       2.973
worst    3.862    14.402       2.953

По аффинности видно, что "лучше" всего подходит третий лиганд. Изобразим первые три "наилучших" конформации каждого лиганда.

C6H6O¶

In [20]:
Image(filename = "l1.png", width=500, height=500)    
Out[20]:

C6H6O2¶

In [18]:
Image(filename = "l2.png", width=500, height=500)
Out[18]:

C12H10O2¶

In [19]:
Image(filename = "l3.png", width=500, height=500)
Out[19]:

TASK¶

Белок для докинга:¶

In [26]:
prot = oddt.toolkit.readfile('pdb','newPDB.pdb').next()
prot.OBMol.AddPolarHydrogens()
prot.OBMol.AutomaticPartialCharge()
Out[26]:
True
In [30]:
affinity = []
rmsd = []

В NAG СH3C(=O)NH группа заменяется на:¶

1) OH¶

In [31]:
mols = []
plot_smiles(['C1(C(C(C(C(O1)CO[H])O[H])O[H])O[H])[OH]'])

docking(prot, ligand_center, mols, name = 'OH1%s.pdb')
***** - Open Babel Depiction O H H H H H O O O O O *****
!!!
--center_x 41.9840191667 --center_y 29.62299 --center_z 22.2443358333 --size_x 20 --size_y 20 --size_z 20 --cpu 1 --exhaustiveness 8 --num_modes 9 --energy_range 3
!!!
In [34]:
Image(filename = "OH_best.png", width=500, height=500)
Out[34]:
In [5]:
Image(filename = "OH_ligand.png", width=500, height=500)
Out[5]:

2) NH3+¶

In [35]:
mols = []
plot_smiles(['C1(C(C(C(C(O1)CO[H])O[H])O[H])O[H])[NH3+]'])

docking(prot, ligand_center, mols, name = 'NH3%s.pdb')
***** - Open Babel Depiction O H H H H H H H + N O O O O *****
!!!
--center_x 41.9840191667 --center_y 29.62299 --center_z 22.2443358333 --size_x 20 --size_y 20 --size_z 20 --cpu 1 --exhaustiveness 8 --num_modes 9 --energy_range 3
!!!
In [7]:
Image(filename = "NH3_ligand.png", width=500, height=500)
Out[7]:
In [8]:
Image(filename = "NH3_best.png", width=500, height=500)
Out[8]:

3) H¶

In [37]:
mols = []
plot_smiles(['C1(C(C(C(C(O1)CO[H])O[H])O[H])[OH])[H]'])

docking(prot, ligand_center, mols, name = 'H%s.pdb')
***** - Open Babel Depiction O O O O O H H H H *****
!!!
--center_x 41.9840191667 --center_y 29.62299 --center_z 22.2443358333 --size_x 20 --size_y 20 --size_z 20 --cpu 1 --exhaustiveness 8 --num_modes 9 --energy_range 3
!!!
In [9]:
Image(filename = "H_ligand.png", width=500, height=500)
Out[9]:
In [10]:
Image(filename = "H_best.png", width=500, height=500)
Out[10]:

4) Ph == C6H5¶

In [38]:
mols = []
plot_smiles(['C1(C(C(C(C(O1)O[H])O[H])O[H])C2CCCCC2)O[H]'])

docking(prot, ligand_center, mols, name = 'Ph%s.pdb')
***** - Open Babel Depiction H H H H O O O O O *****
!!!
--center_x 41.9840191667 --center_y 29.62299 --center_z 22.2443358333 --size_x 20 --size_y 20 --size_z 20 --cpu 1 --exhaustiveness 8 --num_modes 9 --energy_range 3
!!!
In [11]:
Image(filename = "Ph_ligand.png", width=500, height=500)
Out[11]:
In [12]:
Image(filename = "Ph_best.png", width=500, height=500)
Out[12]:

5) COO-¶

In [39]:
mols = []
plot_smiles(['C1(C(C(C(C(O1)O[H])O[H])O[H])C(=O)O)O[H]'])

docking(prot, ligand_center, mols, name = 'COO%s.pdb')

#Image(filename = "ligand_NH3.png", width=500, height=500)
***** - Open Babel Depiction H H H H H O O O O O O O *****
!!!
--center_x 41.9840191667 --center_y 29.62299 --center_z 22.2443358333 --size_x 20 --size_y 20 --size_z 20 --cpu 1 --exhaustiveness 8 --num_modes 9 --energy_range 3
!!!
In [13]:
Image(filename = "COO_ligand.png", width=500, height=500)
Out[13]:
In [14]:
Image(filename = "COO_best.png", width=500, height=500)
Out[14]:

TOTAL AFFINITY

In [50]:
print tabulate(zip(['best','||','||','||','||','||','||','\/','worst'], zip(*affinity)), headers=['order', ['OH  ', 'NH3+  ','H  ','Ph  ','COO']])
order    ['OH  ', 'NH3+  ', 'H  ', 'Ph  ', 'COO']
-------  ------------------------------------------
best     ('-5.3', '-5.4', '-5.0', '-6.3', '-5.7')
||       ('-5.3', '-5.2', '-4.9', '-6.2', '-5.6')
||       ('-5.1', '-5.0', '-4.7', '-5.7', '-5.4')
||       ('-5.0', '-4.9', '-4.7', '-5.6', '-5.4')
||       ('-4.9', '-4.9', '-4.7', '-5.0', '-5.3')
||       ('-4.9', '-4.8', '-4.7', '-4.3', '-5.2')
||       ('-4.8', '-4.7', '-4.5', '-4.2', '-4.9')
\/       ('-4.7', '-4.6', '-4.4', '-4.1', '-4.3')
worst    ('-4.7', '-4.5', '-4.4', '-4.1', '-3.9')

TOTAL RMSD

In [51]:
print tabulate(zip(['best','||','||','||','||','||','||','\/','worst'], zip(*rmsd)), headers=['order', ['OH   ', 'NH3+   ','H   ','Ph   ','COO']])
order    ['OH   ', 'NH3+   ', 'H   ', 'Ph   ', 'COO']
-------  -----------------------------------------------
best     ('0.000', '0.000', '0.000', '0.000', '0.000')
||       ('2.791', '2.907', '2.034', '2.259', '3.448')
||       ('2.025', '3.147', '2.688', '2.568', '3.480')
||       ('3.581', '3.433', '3.536', '5.251', '3.276')
||       ('3.418', '3.595', '4.405', '5.043', '3.047')
||       ('3.118', '3.301', '3.692', '12.197', '3.055')
||       ('4.120', '3.241', '3.138', '14.501', '2.278')
\/       ('1.978', '3.809', '3.137', '15.442', '2.194')
worst    ('3.238', '3.874', '3.391', '15.070', '13.827')
In [ ]: